source("Downstream.v2.R")
# loading libraries
library(Seurat)
library(rrrSingleCellUtils)
library(ggplot2)
library(msigdbr)
library(dplyr)
Load Seurat objects here so only have to complete once (commented out elsewhere).
# Create Seurat objects and perform initial QC. Label original source. osteoblasts
osteoblasts <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0031/outs/filtered_feature_bc_matrix/")
osteoblasts <- subset(osteoblasts, subset = nCount_RNA < 20000 & percent.mt < 10)
osteoblasts$src <- "osteoblasts"
osteoblasts$model <- "osteoblasts"
# OS17 Culture
os17_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0016/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os17_cx_raw <- subset(os17_cx_raw, subset = nFeature_RNA > 3500 & nCount_RNA < 50000 & percent.mt <
15)
os17_cx_raw$src <- "Culture"
os17_cx_raw$model <- "OS-17"
## Tibia
os17_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0018xS0028/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os17_tib_raw <- subset(os17_tib_raw, subset = nFeature_RNA > 3000 & nCount_RNA < 60000 & percent.mt <
18)
os17_tib_raw$src <- "Tibia"
os17_tib_raw$model <- "OS-17"
## Lung double check usage throughout rest of code - Emily (fixed issue where coming from
## wrong file)
os17_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home2/gdrobertslab/lab/Counts/S0024xS0029/outs/filtered_feature_bc_matrix",
species_pattern = "^hg19-")
os17_lung_raw <- subset(os17_lung_raw, subset = nFeature_RNA > 1250 & nCount_RNA < 60000 & percent.mt <
25)
os17_lung_raw$src <- "Lung"
os17_lung_raw$model <- "OS-17"
os17_cx <- NormalizeData(os17_cx_raw) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = os17_cx@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 117232
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8912
## Number of communities: 8
## Elapsed time: 0 seconds
DimPlot(os17_cx, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + scale_color_npg(alpha = 0.7)
# Regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
os17_cx <- CellCycleScoring(object = os17_cx, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
os17_cx <- ScaleData(object = os17_cx, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os17_cx),
block.size = 10000)
os17_cx <- RunPCA(os17_cx, pc.genes = os17_cx@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8908
## Number of communities: 5
## Elapsed time: 0 seconds
# Find optimal clustering resolution
os17_cx_nc <- nRes(os17_cx, res = seq(from = 0.1, to = 0.2, by = 0.05))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9302
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9152
## Number of communities: 5
## Elapsed time: 0 seconds
plot <- pSil(os17_cx_nc, 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9302
## Number of communities: 4
## Elapsed time: 0 seconds
## NULL
plot
## NULL
# With res = 0.15, cells in cluster 3 do not have any significantly different genes that are
# deferentially regulated Therefore, we decided to proceed with res = 0.1
os17_cx <- FindClusters(os17_cx, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 3
## Elapsed time: 0 seconds
if (!file.exists("Data")) {
dir.create("Data")
}
save(os17_cx, file = "Data/os17_cx_CCR.RData")
# Subset all to 2800 cells in each condition
os17_cx <- subset(os17_cx_raw, cells = sample(Cells(os17_cx_raw), 2800))
os17.tib <- subset(os17_tib_raw, cells = sample(Cells(os17_tib_raw), 2800))
os17.lung <- subset(os17_lung_raw, cells = sample(Cells(os17_lung_raw), 2800))
# Merge into a single Seurat object
os17 <- merge(os17_cx, y = c(os17.tib, os17.lung), add.cell_ids = c("Culture", "Tibia", "Lung"),
project = "LineageTracing")
# Process and cluster
os17 <- NormalizeData(os17) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = os17@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8400
## Number of edges: 289959
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9048
## Number of communities: 8
## Elapsed time: 0 seconds
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
os17 <- CellCycleScoring(object = os17, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
os17 <- ScaleData(object = os17, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os17),
block.size = 10000)
os17 <- RunPCA(os17, pc.genes = os17@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8400
## Number of edges: 280874
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8977
## Number of communities: 7
## Elapsed time: 0 seconds
DimPlot(os17, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS17 Clusters") +
scale_color_npg(alpha = 0.7)
# Plot the data
set.seed(100)
cell_ids <- sample(colnames(os17))
DimPlot(os17, reduction = "umap", group.by = "src", pt.size = 1, label = F, order = cell_ids) +
coord_fixed() + ggtitle("OS17 by Source") + scale_color_npg()
save(os17, file = "Data/os17.RData")
# t143b Culture
t143b_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0017/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
t143b_cx_raw <- subset(t143b_cx_raw, subset = nFeature_RNA > 4000 & nCount_RNA < 74000 & percent.mt <
17)
t143b_cx_raw$src <- "Culture"
t143b_cx_raw$model <- "143B"
## Tibia
t143b_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0052/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
t143b_tib_raw <- subset(t143b_tib_raw, subset = nFeature_RNA > 1000 & nCount_RNA < 30000 & percent.mt <
22)
t143b_tib_raw$src <- "Tibia"
t143b_tib_raw$model <- "143B"
# Lung
t143b_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0019-143B-lung/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
t143b_lung_raw <- subset(t143b_lung_raw, subset = nFeature_RNA > 1000 & nCount_RNA < 30000 & percent.mt <
22)
t143b_lung_raw$src <- "Lung"
t143b_lung_raw$model <- "143B"
# NCHOS2 Flank
os2_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0076/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os2_cx_raw <- subset(os2_cx_raw, subset = nCount_RNA < 36000 & percent.mt < 50)
os2_cx_raw$src <- "Flank"
os2_cx_raw$model <- "NCH-OS-2"
## Tibia
os2_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0042/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os2_tib_raw <- subset(os2_tib_raw, subset = nFeature_RNA > 2500 & nCount_RNA < 40000 & percent.mt <
20)
os2_tib_raw$src <- "Tibia"
os2_tib_raw$model <- "NCH-OS-2"
## Lung
os2_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0041/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os2_lung_raw <- subset(os2_lung_raw, subset = nFeature_RNA > 2500 & nCount_RNA < 60000 & percent.mt <
30)
os2_lung_raw$src <- "Lung"
os2_lung_raw$model <- "NCH-OS-2"
# NCHOS7 Flank
os7_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0043/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os7_cx_raw <- subset(os7_cx_raw, subset = nCount_RNA < 50000 & percent.mt < 25)
os7_cx_raw$src <- "Flank"
os7_cx_raw$model <- "NCH-OS-7"
## Tibia
os7_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0034/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os7_tib_raw <- subset(os7_tib_raw, subset = nFeature_RNA > 2000 & nCount_RNA < 35000 & percent.mt <
14)
os7_tib_raw$src <- "Tibia"
os7_tib_raw$model <- "NCH-OS-7"
## Lung
os7_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0055/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os7_lung_raw <- subset(os7_lung_raw, subset = nCount_RNA < 42000 & percent.mt < 20)
os7_lung_raw$src <- "Lung"
os7_lung_raw$model <- "NCH-OS-7"
os7_cx <- NormalizeData(os7_cx_raw) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = os7_cx@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 68728
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8807
## Number of communities: 6
## Elapsed time: 0 seconds
DimPlot(os7_cx, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + scale_color_npg(alpha = 0.7)
# Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
os7_cx <- CellCycleScoring(object = os7_cx, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
os7_cx <- ScaleData(object = os7_cx, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os7_cx),
block.size = 10000)
os7_cx <- RunPCA(os7_cx, pc.genes = os7_cx@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8676
## Number of communities: 5
## Elapsed time: 0 seconds
# Find optimal clustering resolution
os7_cx_nc <- nRes(os7_cx, res = seq(from = 0.1, to = 0.15, by = 0.01))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9248
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9195
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9152
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9123
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9095
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds
plot <- pSil(os7_cx_nc, 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds
## NULL
plot
## NULL
os7_cx <- FindClusters(os7_cx, resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds
save(os7_cx, file = "Data/os7_cx_CCR.RData")
# Save raw objects
# Make lists for easy loop saving
sample_list <- c(osteoblasts, os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw, os7_lung_raw)
sample_names <- c("osteoblasts", "os17_cx_raw", "os17_tib_raw", "os17_lung_raw", "t143b_cx_raw",
"t143b_tib_raw", "t143b_lung_raw", "os2_cx_raw", "os2_tib_raw", "os2_lung_raw", "os7_cx_raw",
"os7_tib_raw", "os7_lung_raw")
# Correlate sample names and sample labels
names(sample_list) <- sample_names
# Save each raw object
for (item in sample_names) {
# Save each sample as an individual Seurat object with proper name
assign(item, sample_list[[item]])
save(list = item, file = paste("Data/", item, ".RData", sep = ""))
}
Merge objects, process, and cell cycle regress.
# Merge into a single Seurat object
OS <- merge(osteoblasts, y = c(os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw, os7_lung_raw),
add.cell_ids = c("Osteoblasts", "OS-17_Culture", "OS-17_Tibia", "OS-17_Lung", "143B_Culture",
"143B_Tibia", "143B_Lung", "NCH-OS-2_Flank", "NCH-OS-2_Tibia", "NCH-OS-2_Lung", "NCH-OS-7_Flank",
"NCH-OS-7_Tibia", "NCH-OS-7_Lung"), project = "Heterogeneity")
# Process and cluster
OS <- NormalizeData(OS) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = os.17@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 54300
## Number of edges: 1853982
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9614
## Number of communities: 10
## Elapsed time: 14 seconds
# rm(osteoblasts, os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
# t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw,
# os7_lung_raw)
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
OS <- CellCycleScoring(object = OS, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
OS <- ScaleData(object = OS, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(OS),
block.size = 10000)
OS <- RunPCA(OS, pc.genes = OS@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 54300
## Number of edges: 1824736
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9610
## Number of communities: 9
## Elapsed time: 14 seconds
save(OS, file = "Data/OS_merged_postCCR.RData")
OS$model <- as.factor(OS$model)
OS$model <- factor(as.factor(OS$model), levels = c("OS-17", "143B", "NCH-OS-2", "NCH-OS-7", "Osteoblasts"))
# Plot the data
DimPlot(OS, reduction = "umap", group.by = "model", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Model") +
scale_color_npg(alpha = 1)
DimPlot(OS, reduction = "umap", group.by = "src", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Source")
DimPlot(OS, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Clusters") +
scale_color_npg(alpha = 0.7)
List objects, process, and cell cycle regress. Used in Figure 3B - ITH code.
# Create List to compute ITH scores (subset to 1500 cells for each)
OS.list_1500 <- list(osteoblasts = osteoblasts, OS17.cx = os17_cx_raw, OS17.Tibia = os17_tib_raw,
OS17.Lung = os17_lung_raw, t143B.cx = t143b_cx_raw, t143B.Tibia = t143b_tib_raw, t143B.Lung = t143b_lung_raw,
OS2.Flank = os2_cx_raw, OS2.Tibia = os2_tib_raw, OS2.Lung = os2_lung_raw, OS7.Flank = os7_cx_raw,
OS7.Tibia = os7_tib_raw, OS7.Lung = os7_lung_raw)
for (i in 1:length(OS.list_1500)) {
OS.list_1500[[i]] <- NormalizeData(OS.list_1500[[i]]) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = OS.list_1500[[i]]@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3) %>%
subset(cells = sample(Cells(OS.list_1500[[i]]), 1500))
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5735
## Number of edges: 201174
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8642
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 117232
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8912
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2849
## Number of edges: 103732
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8568
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4574
## Number of edges: 156586
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8373
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3363
## Number of edges: 123143
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8215
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6763
## Number of edges: 226939
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8477
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5134
## Number of edges: 181392
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8631
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6749
## Number of edges: 222345
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8705
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4285
## Number of edges: 148107
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8520
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1648
## Number of edges: 57506
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8356
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 68728
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8807
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2891
## Number of edges: 102969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8567
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5133
## Number of edges: 177023
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8680
## Number of communities: 9
## Elapsed time: 0 seconds
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
for (i in 1:length(OS.list_1500)) {
OS.list_1500[[i]] <- CellCycleScoring(object = OS.list_1500[[i]], s.features = s.genes, g2m.features = g2m.genes,
set.ident = TRUE)
OS.list_1500[[i]] <- ScaleData(object = OS.list_1500[[i]], vars.to.regress = c("S.Score", "G2M.Score"),
features = VariableFeatures(OS.list_1500[[i]]), block.size = 10000) # Change to VariableFeatures()
OS.list_1500[[i]] <- RunPCA(OS.list_1500[[i]], pc.genes = OS.list_1500[[i]]@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 61069
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8390
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 61975
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8650
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 56704
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8091
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 56977
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7771
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 58610
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7691
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 60734
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7791
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 56073
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7987
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 52995
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8059
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 52729
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7782
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 51652
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7883
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 51919
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8649
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 55164
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8026
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 56741
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8093
## Number of communities: 4
## Elapsed time: 0 seconds
save(OS.list_1500, file = "Data/OSlist_1500_CCR.RData")
List objects, process, and cell cycle regress. Used in Figure 3 C-D code.
# Create list for usage in Figure 3
tibia_list <- list(OS17.Tibia = os17_tib_raw, t143B.Tibia = t143b_tib_raw, OS2.Tibia = os2_tib_raw,
OS7.Tibia = os7_tib_raw)
lung_list <- list(OS17.Lung = os17_lung_raw, t143B.Lung = t143b_lung_raw, OS2.Lung = os2_lung_raw,
OS7.Lung = os7_lung_raw)
OS_list <- list()
for (i in 1:length(tibia_list)) {
message(i, head(paste(tibia_list[[i]]$model, "_TL", sep = ""), 1))
# Subset number of cells in each Seurat object to the least in the group
a <- table(tibia_list[[i]]$orig.ident)
b <- table(lung_list[[i]]$orig.ident)
list <- c(a, b)
min <- min(list)
tibia_list[[i]] <- subset(tibia_list[[i]], cells = sample(Cells(tibia_list[[i]]), min))
lung_list[[i]] <- subset(lung_list[[i]], cells = sample(Cells(lung_list[[i]]), min))
title <- head(paste(tibia_list[[i]]$model, "_TL", sep = ""), 1)
title <- gsub("-", "", title)
OS_list[[title]] <- merge(tibia_list[[i]], y = c(lung_list[[i]]), add.cell_ids = c(head(paste(tibia_list[[i]]$model,
"_", tibia_list[[i]]$src, sep = ""), 1), head(paste(lung_list[[i]]$model, "_", lung_list[[i]]$src,
sep = ""), 1)), project = "Heterogeneity")
}
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
for (i in 1:length(OS_list)) {
OS_list[[i]] <- NormalizeData(OS_list[[i]]) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = OS_list[[i]]@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
OS_list[[i]] <- CellCycleScoring(object = OS_list[[i]], s.features = s.genes, g2m.features = g2m.genes,
set.ident = TRUE)
OS_list[[i]] <- ScaleData(object = OS_list[[i]], vars.to.regress = c("S.Score", "G2M.Score"),
features = VariableFeatures(OS_list[[i]]), block.size = 10000) # Change to VariableFeatures()
OS_list[[i]] <- RunPCA(OS_list[[i]], pc.genes = OS_list[[i]]@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5698
## Number of edges: 195555
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8596
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5698
## Number of edges: 190346
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8444
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10268
## Number of edges: 352757
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8850
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10268
## Number of edges: 343259
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8657
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3296
## Number of edges: 114815
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8568
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3296
## Number of edges: 110588
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8283
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5782
## Number of edges: 203134
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8971
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5782
## Number of edges: 201063
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8855
## Number of communities: 5
## Elapsed time: 0 seconds
save(OS_list, file = "Data/OS_list_CCR.RData")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux 8.4 (Ootpa)
##
## Matrix products: default
## BLAS: /gpfs0/export/apps/opt/R/4.1.0-foss-2020a/lib64/R/lib/libRblas.so
## LAPACK: /gpfs0/export/apps/opt/R/4.1.0-foss-2020a/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] cluster_2.1.4 msigdbr_7.5.1 rrrSingleCellUtils_0.5.0
## [4] nichenetr_1.1.0 RColorBrewer_1.1-3 ggplot2_3.3.6
## [7] reshape2_1.4.4 sp_1.4-7 SeuratObject_4.1.0
## [10] Seurat_4.1.1 reticulate_1.25 Matrix_1.5-3
## [13] dplyr_1.0.9 cowplot_1.1.1 ggsci_2.9
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 tidyselect_1.1.2 htmlwidgets_1.5.4
## [4] grid_4.1.0 Rtsne_0.16 pROC_1.18.0
## [7] munsell_0.5.0 codetools_0.2-18 ica_1.0-3
## [10] future_1.29.0 miniUI_0.1.1.1 withr_2.5.0
## [13] spatstat.random_3.0-1 colorspace_2.0-3 progressr_0.11.0
## [16] highr_0.9 knitr_1.39 rstudioapi_0.13
## [19] stats4_4.1.0 ROCR_1.0-11 tensor_1.5
## [22] listenv_0.8.0 labeling_0.4.2 polyclip_1.10-4
## [25] farver_2.1.1 parallelly_1.32.1 vctrs_0.4.1
## [28] generics_0.1.3 ipred_0.9-12 xfun_0.31
## [31] randomForest_4.7-1.1 R6_2.5.1 doParallel_1.0.17
## [34] clue_0.3-63 bitops_1.0-7 spatstat.utils_3.0-1
## [37] assertthat_0.2.1 promises_1.2.0.1 scales_1.2.0
## [40] nnet_7.3-18 rgeos_0.5-9 gtable_0.3.1
## [43] globals_0.16.2 goftest_1.2-3 timeDate_3043.102
## [46] rlang_1.0.2 GlobalOptions_0.1.2 splines_4.1.0
## [49] lazyeval_0.2.2 ModelMetrics_1.2.2.2 spatstat.geom_3.0-3
## [52] checkmate_2.1.0 yaml_2.3.5 abind_1.4-5
## [55] backports_1.4.1 httpuv_1.6.6 Hmisc_4.7-0
## [58] caret_6.0-92 DiagrammeR_1.0.9 tools_4.1.0
## [61] lava_1.6.10 ellipsis_0.3.2 spatstat.core_2.4-4
## [64] jquerylib_0.1.4 proxy_0.4-27 BiocGenerics_0.40.0
## [67] ggridges_0.5.3 Rcpp_1.0.9 plyr_1.8.8
## [70] base64enc_0.1-3 visNetwork_2.1.0 purrr_0.3.4
## [73] rpart_4.1.16 deldir_1.0-6 pbapply_1.6-0
## [76] GetoptLong_1.0.5 S4Vectors_0.32.4 zoo_1.8-10
## [79] ggrepel_0.9.1 magrittr_2.0.3 RSpectra_0.16-1
## [82] data.table_1.14.6 scattermore_0.8 circlize_0.4.15
## [85] lmtest_0.9-40 RANN_2.6.1 fitdistrplus_1.1-8
## [88] matrixStats_0.63.0 hms_1.1.1 patchwork_1.1.1
## [91] mime_0.12 evaluate_0.18 xtable_1.8-4
## [94] jpeg_0.1-9 IRanges_2.28.0 gridExtra_2.3
## [97] shape_1.4.6 compiler_4.1.0 tibble_3.1.7
## [100] KernSmooth_2.23-20 crayon_1.5.2 htmltools_0.5.2
## [103] mgcv_1.8-41 later_1.3.0 tzdb_0.3.0
## [106] Formula_1.2-4 tidyr_1.2.0 lubridate_1.8.0
## [109] DBI_1.1.3 formatR_1.12 ComplexHeatmap_2.10.0
## [112] MASS_7.3-58.1 babelgene_22.3 readr_2.1.2
## [115] cli_3.4.1 parallel_4.1.0 gower_1.0.0
## [118] igraph_1.3.1 pkgconfig_2.0.3 foreign_0.8-83
## [121] plotly_4.10.0 spatstat.sparse_3.0-0 recipes_0.2.0
## [124] foreach_1.5.2 bslib_0.3.1 hardhat_0.2.0
## [127] prodlim_2019.11.13 stringr_1.4.0 digest_0.6.30
## [130] sctransform_0.3.3 RcppAnnoy_0.0.20 spatstat.data_3.0-0
## [133] rmarkdown_2.14 leiden_0.4.2 htmlTable_2.4.0
## [136] uwot_0.1.11 shiny_1.7.1 rjson_0.2.21
## [139] lifecycle_1.0.1 nlme_3.1-160 jsonlite_1.8.3
## [142] viridisLite_0.4.0 limma_3.50.3 fansi_1.0.3
## [145] pillar_1.7.0 lattice_0.20-45 fastmap_1.1.0
## [148] httr_1.4.4 survival_3.3-1 glue_1.6.2
## [151] fdrtool_1.2.17 png_0.1-7 iterators_1.0.14
## [154] class_7.3-20 stringi_1.7.6 sass_0.4.1
## [157] latticeExtra_0.6-29 caTools_1.18.2 irlba_2.3.5.1
## [160] e1071_1.7-12 future.apply_1.9.0